MOVIE_RATING_PATH = "../../data/movie_rating_20190219_1.csv"

# -----------------------------基于物品的协同过滤-------------------------------


def eu_distance(item1, item2):
    # 用户间距离：欧氏距离
    distance = 0
    cal = 0
    for item1_key in item1.keys():
        if item1_key in item2.keys():
            distance = distance + pow(item1[item1_key] - item2[item1_key], 2)
            cal = cal + 1
    return (distance ** 0.5) / (cal + 0.001)


def cos_distance(item1, item2):
    # 用户间距离：余弦距离
    distance = 0
    item1_norm = 0
    item2_norm = 0
    cal = 0
    for item1_key in item1.keys():
        if item1_key in item2.keys():
            distance = distance + item1[item1_key] * item2[item1_key]
            item1_norm = item1_norm + item1[item1_key] * item1[item1_key]
            item2_norm = item2_norm + item2[item1_key] * item2[item1_key]
            cal = cal + 1
    res = distance / ((item1_norm ** 0.5) * (item2_norm ** 0.5) + 0.001)
    return res


def top_similar(data, item, num=10):
    # 最相似的N个item
    res = []
    for itemid in data.keys():
        if itemid == item:
            continue
        sim = eu_distance(data[item], data[itemid])
        # sim = cos_distance(data[item], data[itemid])
        res.append((itemid, sim))
    res.sort(key=lambda val: val[1])
    return res[:num]


def getMovie(data, user, num=10):
    movie = []
    for item in data:
        if user in data[item]:
            movie.append((item, data[item][user]))
    movie.sort(key=lambda val: val[1])
    if num == -1:
        # 特别的输出请求：用户所有看过的电影
        return [item[0] for item in movie]
    return movie[:num]


def cal_avg(data, item):
    score = 0
    count = 0
    for user in data[item]:
        score = score + data[item][user]
        count = count + 1
    return score / count


def recommend(data, user, item_num_plus=5, item_num_sim=5, rec_num=10):
    # 进行推荐的主函数
    # 获得用户看过的所有好item
    movie_top = getMovie(data, user, num=item_num_plus)
    # 找出好item相似的item
    movie_close = []
    for movie_item in movie_top:
        movie_get = top_similar(data, movie_item[0], num=item_num_sim)
        for mov_temp in movie_get:
            if mov_temp[0] not in movie_close:
                movie_close.append(mov_temp[0])
    # 相似item的平均分计算
    movie_score = []
    for mov in movie_close:
        movie_score.append((mov, cal_avg(data, mov)))
    # item平均分打分排序
    movie_score.sort(key=lambda val: val[1], reverse=True)
    # 推荐结果整理
    res = []
    USER = getMovie(data, user, num=-1)
    for item in movie_score:
        if item[0] in USER:
            # 跳过用户已经看过的电影
            continue
        res.append(item)
    return res[:rec_num]

# -----------------------------基于物品的协同过滤-------------------------------


# json格式化-user-movie-rating-加载版本
data = {}  # DIC用item-用户-打分的形式
with open(MOVIE_RATING_PATH, 'r', encoding='UTF-8') as f:
    idx = 0
    for line in f:
        if idx == 0:
            idx = 1
            continue
        ll = line.strip().split(",")
        if ll[3] not in data:
            data[ll[3]] = {}
        data[ll[3]][ll[0]] = float(ll[1])

print("data imported: %s" % len(data))

print(recommend(data, "1"))
